#Remove history
rm(list=ls())

#Read Israel Data (STATA format) in R & Load Relevant Packages 
setwd("/Users/idahjermitslev/Documents/Maximum Likelihood Estimation/Replication Project")
library(foreign)
isr = read.dta("new2006.dta")
save(isr, file='isr.RData')

attach(isr)
library(splines)
library(survival) 
library(msm) 
library(catspec)
library(MASS)
library(stargazer)
library(xtable)
library(lmtest)
library(sandwich)
library(sbgcop)
library(ggplot2)
library(Amelia)
library(lmtest)
library(reshape2)
library(sandwich)
library(ggthemes)
library(arm)
library(separationplot)

#Correlations Vote and Expectations
votemer<-mat.or.vec(length(count),1)
votelab<-mat.or.vec(length(count),1)
votekad<-mat.or.vec(length(count),1)
votesha<-mat.or.vec(length(count),1)
votelik<-mat.or.vec(length(count),1)
voteisr<-mat.or.vec(length(count),1)
votemaf<-mat.or.vec(length(count),1)
votelab[vote==2]<-1; votelik[vote==5]<-1;
votemer[vote==1]<-1; votesha[vote==4]<-1;
votekad[vote==3]<-1; voteisr[vote==6]<-1;
votemaf[vote==7]<-1;
cor(cbind(votemer,votelab,votekad,votesha,votelik,voteisr,votemaf,colab,cokad,colik,codir),
    use="complete.obs", method = c("pearson"))

x<-cbind(count,choice,1,dis,
         cmer,mercodir,merself,merrelpos,merecopos,merterpos,merage,merfem,meredu,merdense,merrelig,merprefr,merfsu,
         clab,labcodir,labself,labrelpos,labecopos,labterpos,labage,labfem,labedu,labdense,labrelig,labprefr,labfsu,
         ckad,kadcodir,kadself,kadrelpos,kadecopos,kadterpos,kadage,kadfem,kadedu,kaddense,kadrelig,kadprefr,kadfsu,
         csha,shacodir,shaself,sharelpos,shaecopos,shaterpos,shaage,shafem,shaedu,shadense,sharelig,shaprefr,shafsu,
         clik,likcodir,likself,likrelpos,likecopos,likterpos,likage,likfem,likedu,likdense,likrelig,likprefr,likfsu,
         cisr,isrcodir,isrself,isrrelpos,isrecopos,isrterpos,israge,isrfem,isredu,isrdense,isrrelig,isrprefr,isrfsu,
         cmaf,mafcodir,mafself,mafrelpos,mafecopos,mafterpos,mafage,maffem,mafedu,mafdense,mafrelig,mafprefr,maffsu)
vote1<-as.data.frame(x)

#Multiples of seven
ind.resp <- seq(from=1,to=nrow(isr),by=7)

#Party positions and mean
parties <- cbind(isr$lik, isr$lab, isr$kad, isr$sha, isr$maf, isr$isr, isr$mer)
party.pos <- apply(parties, 2, mean, na.rm=TRUE)

#Descriptive stats of explanatory variables 
des.vars.all <- cbind(isr$terpos, isr$relpos, isr$ecopos, isr$age, isr$relig, isr$edu, isr$fem, isr$fsu)
des.vars <- des.vars.all[ind.resp,]
xtable(summary(des.vars))

####################################################################################
#Conditional Logit Model

#Model of Table 2 - Full Controls
mod1<-clogit(choice~dis+
               cmer+mercodir+merprefr+merself+merrelpos+merecopos+merterpos+merage+merfem+meredu+merdense+merfsu+merrelig+ #2-11
               clab+labcodir+labprefr+labself+labrelpos+labecopos+labterpos+labage+labfem+labedu+labdense+labfsu+labrelig+ #12-21
               csha+shacodir+shaprefr+shaself+sharelpos+shaecopos+shaterpos+shaage+shafem+shaedu+shadense+shafsu+sharelig+ #22-31
               clik+likcodir+likprefr+likself+likrelpos+likecopos+likterpos+likage+likfem+likedu+likdense+likfsu+likrelig+ #32-41
               cisr+isrcodir+isrprefr+isrself+isrrelpos+isrecopos+isrterpos+israge+isrfem+isredu+isrdense+isrfsu+isrrelig+ #42-51
               cmaf+mafcodir+mafprefr+mafself+mafrelpos+mafecopos+mafterpos+mafage+maffem+mafedu+mafdense+maffsu+mafrelig+ #52-61
               strata(count),data=vote1,method=c("exact"),na.action=na.omit)
mod1

stargazer(mod1)

####################################################################################################################
###Code for Figure 2 (on paper) and Figure 4 (on Accompanying Materials): 
###First part calculates phats for varing levels of coalition expectations
###second part calculates (via Delta method) standard error of phats, and
### thrid plots the results.

# Pull out relevant regression parameters
coefs = coef(mod1)

#Create Phats for each party by varying Coalition Expectations

#Set up Independent Variable
n<-200
expec<-seq(-100, 100, length=n)

one<-rep(1,times=200)
mone<-rep(-1,times=200)
zero<-rep(0,times=200)
Z<-cbind(zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero)

etamer<-matrix(nrow=200,ncol=3)
etalab<-matrix(nrow=200,ncol=3)
etasha<-matrix(nrow=200,ncol=3)
etalik<-matrix(nrow=200,ncol=3)
etaisr<-matrix(nrow=200,ncol=3)
etamaf<-matrix(nrow=200,ncol=3)

num <- function(x){
  as.numeric(x)
}

X1<-cbind(one,expec,rep(mean(prefr,na.rm=TRUE),times=200),one,rep(mean(num(relpos),na.rm=TRUE),times=200),
          rep(mean(num(ecopos),na.rm=TRUE),times=200),rep(mean(num(terpos),na.rm=TRUE),times=200),rep(mean(age,na.rm=TRUE),times=200),
          zero,rep(mean(edu,na.rm=TRUE),times=200),rep(mean(dense,na.rm=TRUE),times=200),zero,rep(mean(num(relig),na.rm=TRUE),times=200))
vals <- c(1,5,9)

#ETA - No Interaction
for(j in 1:length(vals)) {
  temp<-X1
  temp[,4]<-X1[,4]*vals[j]
  etamer[,j]<-cbind(one,temp,Z,Z,Z,Z,Z)%*%coefs
  etalab[,j]<-cbind(one,Z,temp,Z,Z,Z,Z)%*%coefs
  etasha[,j]<-cbind(one,Z,Z,temp,Z,Z,Z)%*%coefs
  etalik[,j]<-cbind(one,Z,Z,Z,temp,Z,Z)%*%coefs
  etaisr[,j]<-cbind(one,Z,Z,Z,Z,temp,Z)%*%coefs
  etamaf[,j]<-cbind(one,Z,Z,Z,Z,Z,temp)%*%coefs
}

#Create Phats
deno1<-rep(NA,times=200)
deno5<-rep(NA,times=200)
deno9<-rep(NA,times=200)
deno<-cbind(deno1,deno5,deno9)

# Multinomial logit link function
for(j in 1:3) {
  deno[,j]<-1+exp(etamer[,j])+exp(etalab[,j])+exp(etasha[,j])+
    exp(etalik[,j])+exp(etaisr[,j])+exp(etamaf[,j])
}

phat<-function(a,b){exp(a)/deno[,b]}

p_lab1<-phat(etalab[,1],1)
p_lab5<-phat(etalab[,2],2)
p_lab9<-phat(etalab[,3],3)
p_lab<-cbind(p_lab1,p_lab5,p_lab9)

p_lik1<-phat(etalik[,1],1)
p_lik5<-phat(etalik[,2],2)
p_lik9<-phat(etalik[,3],3)
p_lik<-cbind(p_lik1,p_lik5,p_lik9)

p_kad1<-1/deno[,1]
p_kad5<-1/deno[,2]
p_kad9<-1/deno[,3]
p_kad<-cbind(p_kad1,p_kad5,p_kad9)

##########################
#Substantive effects simulation

# Simulate additional parameter estimates from multivariate normal
set.seed(8668)
draws = mvrnorm(1000, coef(mod1), vcov(mod1))

five<-rep(5,times=200)

nine<-rep(9,times=200)

X5<-cbind(one,expec,rep(mean(prefr,na.rm=TRUE),times=200),five,rep(mean(num(relpos),na.rm=TRUE),times=200),
          rep(mean(num(ecopos),na.rm=TRUE),times=200),rep(mean(num(terpos),na.rm=TRUE),times=200),rep(mean(age,na.rm=TRUE),times=200),
          zero,rep(mean(edu,na.rm=TRUE),times=200),rep(mean(dense,na.rm=TRUE),times=200),zero,rep(mean(num(relig),na.rm=TRUE),times=200))

X9<-cbind(one,expec,rep(mean(prefr,na.rm=TRUE),times=200),nine,rep(mean(num(relpos),na.rm=TRUE),times=200),
          rep(mean(num(ecopos),na.rm=TRUE),times=200),rep(mean(num(terpos),na.rm=TRUE),times=200),rep(mean(age,na.rm=TRUE),times=200),
          zero,rep(mean(edu,na.rm=TRUE),times=200),rep(mean(dense,na.rm=TRUE),times=200),zero,rep(mean(num(relig),na.rm=TRUE),times=200))

simulation1.mer<-matrix(nrow=200,ncol=1000)
simulation1.lab<-matrix(nrow=200,ncol=1000)
simulation1.sha<-matrix(nrow=200,ncol=1000)
simulation1.lik<-matrix(nrow=200,ncol=1000)
simulation1.isr<-matrix(nrow=200,ncol=1000)
simulation1.maf<-matrix(nrow=200,ncol=1000)

simulation1.mer<-draws%*%t(cbind(one,X1,Z,Z,Z,Z,Z))
simulation1.lab<-draws%*%t(cbind(one,Z,X1,Z,Z,Z,Z))
simulation1.sha<-draws%*%t(cbind(one,Z,Z,X1,Z,Z,Z))
simulation1.lik<-draws%*%t(cbind(one,Z,Z,Z,X1,Z,Z))
simulation1.isr<-draws%*%t(cbind(one,Z,Z,Z,Z,X1,Z))
simulation1.maf<-draws%*%t(cbind(one,Z,Z,Z,Z,Z,X1))

simulation5.mer<-matrix(nrow=200,ncol=1000)
simulation5.lab<-matrix(nrow=200,ncol=1000)
simulation5.sha<-matrix(nrow=200,ncol=1000)
simulation5.lik<-matrix(nrow=200,ncol=1000)
simulation5.isr<-matrix(nrow=200,ncol=1000)
simulation5.maf<-matrix(nrow=200,ncol=1000)

simulation5.mer<-draws%*%t(cbind(one,X5,Z,Z,Z,Z,Z))
simulation5.lab<-draws%*%t(cbind(one,Z,X5,Z,Z,Z,Z))
simulation5.sha<-draws%*%t(cbind(one,Z,Z,X5,Z,Z,Z))
simulation5.lik<-draws%*%t(cbind(one,Z,Z,Z,X5,Z,Z))
simulation5.isr<-draws%*%t(cbind(one,Z,Z,Z,Z,X5,Z))
simulation5.maf<-draws%*%t(cbind(one,Z,Z,Z,Z,Z,X5))

simulation9.mer<-matrix(nrow=200,ncol=1000)
simulation9.lab<-matrix(nrow=200,ncol=1000)
simulation9.sha<-matrix(nrow=200,ncol=1000)
simulation9.lik<-matrix(nrow=200,ncol=1000)
simulation9.isr<-matrix(nrow=200,ncol=1000)
simulation9.maf<-matrix(nrow=200,ncol=1000)

simulation9.mer<-draws%*%t(cbind(one,X9,Z,Z,Z,Z,Z))
simulation9.lab<-draws%*%t(cbind(one,Z,X9,Z,Z,Z,Z))
simulation9.sha<-draws%*%t(cbind(one,Z,Z,X9,Z,Z,Z))
simulation9.lik<-draws%*%t(cbind(one,Z,Z,Z,X9,Z,Z))
simulation9.isr<-draws%*%t(cbind(one,Z,Z,Z,Z,X9,Z))
simulation9.maf<-draws%*%t(cbind(one,Z,Z,Z,Z,Z,X9))


sim.p_lab1<-phat(simulation1.lab,1)
sim.p_lab5<-phat(simulation5.lab,1)
sim.p_lab9<-phat(simulation9.lab,1)

sim.p_lik1<-phat(simulation1.lik,1)
sim.p_lik5<-phat(simulation5.lik,1)
sim.p_lik9<-phat(simulation9.lik,1)

deno1<-1+exp(simulation1.mer)+exp(simulation1.lab)+exp(simulation1.sha)+
    exp(simulation1.lik)+exp(simulation1.isr)+exp(simulation1.maf)

deno5<-1+exp(simulation5.mer)+exp(simulation5.lab)+exp(simulation5.sha)+
  exp(simulation5.lik)+exp(simulation5.isr)+exp(simulation5.maf)

deno9<-1+exp(simulation9.mer)+exp(simulation9.lab)+exp(simulation9.sha)+
  exp(simulation9.lik)+exp(simulation9.isr)+exp(simulation9.maf)

sim.p_kad1<-1/deno1
sim.p_kad5<-1/deno5
sim.p_kad9<-1/deno9


upr1Labor <- c(1:200)
lwr1Labor <- c(1:200)
upr1Likud <- c(1:200)
lwr1Likud <- c(1:200)
upr1Kadima <- c(1:200)
lwr1Kadima <- c(1:200)
upr5Labor <- c(1:200)
lwr5Labor <- c(1:200)
upr5Likud <- c(1:200)
lwr5Likud <- c(1:200)
upr5Kadima <- c(1:200)
lwr5Kadima <- c(1:200)
upr9Labor <- c(1:200)
lwr9Labor <- c(1:200)
upr9Likud <- c(1:200)
lwr9Likud <- c(1:200)
upr9Kadima <- c(1:200)
lwr9Kadima <- c(1:200)


for(i in 1:200){
upr1Labor[i] <- quantile(sim.p_lab1[,i], .975, na.rm=TRUE)
lwr1Labor[i] <- quantile(sim.p_lab1[,i], .025, na.rm=TRUE)
upr1Likud[i] <- quantile(sim.p_lik1[,i], .975, na.rm=TRUE)
lwr1Likud[i] <- quantile(sim.p_lik1[,i], .025, na.rm=TRUE)
upr1Kadima[i] <- quantile(sim.p_kad1[,i], .975, na.rm=TRUE)
lwr1Kadima[i] <- quantile(sim.p_kad1[,i], .025, na.rm=TRUE)
upr5Labor[i] <- quantile(sim.p_lab5[,i], .975, na.rm=TRUE)
lwr5Labor[i] <- quantile(sim.p_lab5[,i], .025, na.rm=TRUE)
upr5Likud[i] <- quantile(sim.p_lik5[,i], .975, na.rm=TRUE)
lwr5Likud[i] <- quantile(sim.p_lik5[,i], .025, na.rm=TRUE)
upr5Kadima[i] <- quantile(sim.p_kad5[,i], .975, na.rm=TRUE)
lwr5Kadima[i] <- quantile(sim.p_kad5[,i], .025, na.rm=TRUE)
upr9Labor[i] <- quantile(sim.p_lab9[,i], .975, na.rm=TRUE)
lwr9Labor[i] <- quantile(sim.p_lab9[,i], .025, na.rm=TRUE)
upr9Likud[i] <- quantile(sim.p_lik9[,i], .975, na.rm=TRUE)
lwr9Likud[i] <- quantile(sim.p_lik9[,i], .025, na.rm=TRUE)
upr9Kadima[i] <- quantile(sim.p_kad9[,i], .975, na.rm=TRUE)
lwr9Kadima[i] <- quantile(sim.p_kad9[,i], .025, na.rm=TRUE)
}


##### Making graphs
tmpLabor=ggplot() +
  # Lets add the confidence intervals to our plot
  geom_ribbon(aes(fill="scenLeft", ymin=lwr1Labor, ymax=upr1Labor, x=expec), alpha=0.3) +
  geom_ribbon(aes(fill="scenCenter", ymin=lwr5Labor, ymax=upr5Labor, x=expec), alpha=0.3) +
  geom_ribbon(aes(fill="scenRight", ymin=lwr9Labor, ymax=upr9Labor, x=expec), alpha=0.3) +
  geom_hline(yintercept=1, color='red', linetype='dashed') +
  # Plotting our predicted values
  geom_line(aes(x=expec, y=p_lab1, color="scenLeft")) +
  geom_line(aes(x=expec, y=p_lab5, color="scenCenter")) +
  geom_line(aes(x=expec, y=p_lab9, color="scenRight")) +
  # Relabel legend items
  scale_color_discrete(breaks=c("scenLeft","scenCenter", "scenRight"), labels=c('Left', 'Center', 'Right')) +
  #Make title
  ggtitle("Labor")+
  # Relabel y-axis
  scale_y_continuous(name='Probability of voting for Labor') + 
  # Relabel x-axis
  scale_x_continuous(name='Coalition Expectation') +
  # Remove remaining legends
  scale_fill_discrete(guide='none') +
  # Clean up plot
  theme(legend.position='top', legend.title=element_blank(), 
      axis.ticks=element_blank(), panel.border=element_blank(),
      axis.title.y=element_text(vjust=2))

tmpLabor

tmpLikud=ggplot() +
  # Lets add the confidence intervals to our plot
  geom_ribbon(aes(fill="scenLeft", ymin=lwr1Likud, ymax=upr1Likud, x=expec), alpha=0.3) +
  geom_ribbon(aes(fill="scenCenter", ymin=lwr5Likud, ymax=upr5Likud, x=expec), alpha=0.3) +
  geom_ribbon(aes(fill="scenRight", ymin=lwr9Likud, ymax=upr9Likud, x=expec), alpha=0.3) +
  geom_hline(yintercept=1, color='red', linetype='dashed') +
  # Plotting our predicted values
  geom_line(aes(x=expec, y=p_lik1, color="scenLeft")) +
  geom_line(aes(x=expec, y=p_lik5, color="scenCenter")) +
  geom_line(aes(x=expec, y=p_lik9, color="scenRight")) +
  # Relabel legend items
  scale_color_discrete(breaks=c("scenLeft","scenCenter", "scenRight"), labels=c('Left', 'Center', 'Right')) +
  #Make title
  ggtitle("Likud")+
  # Relabel y-axis
  scale_y_continuous(name='Probability of voting for Likud') + 
  # Relabel x-axis
  scale_x_continuous(name='Coalition Expectation') +
  # Remove remaining legends
  scale_fill_discrete(guide='none') +
  # Clean up plot
  theme(legend.position='top', legend.title=element_blank(), 
        axis.ticks=element_blank(), panel.border=element_blank(),
        axis.title.y=element_text(vjust=2))

tmpLikud

tmpKadima=ggplot() +
  # Lets add the confidence intervals to our plot
  geom_ribbon(aes(fill="scenLeft", ymin=lwr1Kadima, ymax=upr1Kadima, x=expec), alpha=0.3) +
  geom_ribbon(aes(fill="scenCenter", ymin=lwr5Kadima, ymax=upr5Kadima, x=expec), alpha=0.3) +
  geom_ribbon(aes(fill="scenRight", ymin=lwr9Kadima, ymax=upr9Kadima, x=expec), alpha=0.3) +
  geom_hline(yintercept=1, color='red', linetype='dashed') +
  # Plotting our predicted values
  geom_line(aes(x=expec, y=p_kad1, color="scenLeft")) +
  geom_line(aes(x=expec, y=p_kad5, color="scenCenter")) +
  geom_line(aes(x=expec, y=p_kad9, color="scenRight")) +
  # Relabel legend items
  scale_color_discrete(breaks=c("scenLeft","scenCenter", "scenRight"), labels=c('Left', 'Center', 'Right')) +
  #Make title
  ggtitle("Kadima")+
  # Relabel y-axis
  scale_y_continuous(name='Probability of voting for Kadima', limits=c(0,1)) + 
  # Relabel x-axis
  scale_x_continuous(name='Coalition Expectation') +
  # Remove remaining legends
  scale_fill_discrete(guide='none') +
  # Clean up plot
  theme(legend.position='top', legend.title=element_blank(), 
        axis.ticks=element_blank(), panel.border=element_blank(),
        axis.title.y=element_text(vjust=2))

tmpKadima

# Run a loop that defines each of the subsamples as the testsample which the model is evaluated against
for (i in 1:7){
  test=ms[ms$fold==i,]
  train=ms[ms$fold!=i,]
  train.mod=glm(deaths~sanctions, data=train, family=binomial(link=logit))
  train.results = cbind(coef(train.mod), se.coef(train.mod), fold=rep(i,2))
  coefCrossVal=rbind(coefCrossVal, train.results)
  predVals=coef(train.mod) %*% t(data.matrix(cbind(1, test$sanctions)))
  plotdata <- rbind(plotdata, cbind(predProbs = as.numeric(1/(1+exp(-predVals))), dv = ms[ms$fold == i, 'deaths'],i))
  perf <- c(perf,rmse(predVals,test$deaths))
}
# 

#Relevant explanatory variables
vars <- cbind(choice, dis,
                    cmer,mercodir,merprefr,merself,merrelpos,merecopos,merterpos,merage,merfem,meredu,merdense,merfsu,merrelig,
                    clab,labcodir,labprefr,labself,labrelpos,labecopos,labterpos,labage,labfem,labedu,labdense,labfsu,labrelig,
                    csha,shacodir,shaprefr,shaself,sharelpos,shaecopos,shaterpos,shaage,shafem,shaedu,shadense,shafsu,sharelig,
                    clik,likcodir,likprefr,likself,likrelpos,likecopos,likterpos,likage,likfem,likedu,likdense,likfsu,likrelig,
                    cisr,isrcodir,isrprefr,isrself,isrrelpos,isrecopos,isrterpos,israge,isrfem,isredu,isrdense,isrfsu,isrrelig,
                    cmaf,mafcodir,mafprefr,mafself,mafrelpos,mafecopos,mafterpos,mafage,maffem,mafedu,mafdense,maffsu,mafrelig)
vars <- data.frame(na.omit(vars))

#Subset by party
mer.data <- vars[which(vars$cmer==1),]
lab.data <- vars[which(vars$clab==1),]
sha.data <- vars[which(vars$csha==1),]
lik.data <- vars[which(vars$clik==1),]
isr.data <- vars[which(vars$cisr==1),]
maf.data <- vars[which(vars$cmaf==1),]
kad.data <- vars[which(vars$cmer!=1 & vars$clab!=1 & vars$csha!=1 & vars$clik!=1 & vars$cisr!=1 & vars$cmaf!=1),]

#Separationplots for each party
pred.value.mer <-  coefs%*%t(mer.data[,2:80])
mer.predProbs = as.numeric(plogis(pred.value.mer))
separationplot(actual=mer.data$choice, pred = mer.predProbs, heading = "Meretz")

pred.value.lab <-  coefs%*%t(lab.data[,2:80])
lab.predProbs = as.numeric(plogis(pred.value.lab))
separationplot(actual=lab.data$choice, pred = lab.predProbs, heading = "Labor")

pred.value.sha <-  coefs%*%t(sha.data[,2:80])
sha.predProbs = as.numeric(plogis(pred.value.sha))
separationplot(actual=sha.data$choice, pred = sha.predProbs, heading = "Shas")

pred.value.lik <-  coefs%*%t(lik.data[,2:80])
lik.predProbs = as.numeric(plogis(pred.value.lik))
separationplot(actual=lik.data$choice, pred = lik.predProbs, heading = "Likud")

pred.value.isr <-  coefs%*%t(isr.data[,2:80])
isr.predProbs = as.numeric(plogis(pred.value.isr))
separationplot(actual=isr.data$choice, pred = isr.predProbs, heading = "Israel Beitenu")

pred.value.maf <-  coefs%*%t(maf.data[,2:80])
maf.predProbs = as.numeric(plogis(pred.value.maf))
separationplot(actual=maf.data$choice, pred = maf.predProbs, heading = "Mafdal")

pred.value.kad <-  coefs%*%t(kad.data[,2:80])
kad.predProbs = as.numeric(plogis(pred.value.kad))
separationplot(actual=kad.data$choice, pred = kad.predProbs, heading = "Kadima")

################## 
#Cross-Validation
#################

#Divide the INES data set into 5 samples by count
random.sequence <- sample(1:5, length(unique(count)), replace=TRUE)
isr$random = NA

#First respondent
isr$random[1] <-random.sequence[1]
isr$random[2] <-random.sequence[1]
isr$random[3] <-random.sequence[1]
isr$random[4] <-random.sequence[1]
isr$random[5] <-random.sequence[1]
isr$random[6] <-random.sequence[1]
isr$random[7] <-random.sequence[1]

#All subsequent respondents (multiples of 7)
for (i in 1:1263){
isr$random[7*i+1] <- random.sequence[i+1]
isr$random[7*i+2] <- random.sequence[i+1]
isr$random[7*i+3] <- random.sequence[i+1]
isr$random[7*i+4] <- random.sequence[i+1]
isr$random[7*i+5] <- random.sequence[i+1]
isr$random[7*i+6] <- random.sequence[i+1]
isr$random[7*i+7] <- random.sequence[i+1]
}

#Check it looks right
table(isr$random)

#Create a function for the root mean squared error
rmse = function (preds, actual){sqrt(mean(preds-actual)^2)}

#Create place holder objects
coefCrossVal = NULL
perf=NULL

#Identify the relevant variables
which(names(coef(mod1))=="labcodir")
which(names(coef(mod1))=="likcodir")

# Run a loop that defines each of the subsamples as the testsample which the model is evaluated against
for (i in 1:5){
  print(i)
  test=isr[isr$random==i,]
  train=isr[isr$random!=i,]
  train.mod<-clogit(choice~dis+
                      cmer+mercodir+merprefr+merself+merrelpos+merecopos+merterpos+merage+merfem+meredu+merdense+merfsu+merrelig+ #2-11
                      clab+labcodir+labprefr+labself+labrelpos+labecopos+labterpos+labage+labfem+labedu+labdense+labfsu+labrelig+ #12-21
                      csha+shacodir+shaprefr+shaself+sharelpos+shaecopos+shaterpos+shaage+shafem+shaedu+shadense+shafsu+sharelig+ #22-31
                      clik+likcodir+likprefr+likself+likrelpos+likecopos+likterpos+likage+likfem+likedu+likdense+likfsu+likrelig+ #32-41
                      cisr+isrcodir+isrprefr+isrself+isrrelpos+isrecopos+isrterpos+israge+isrfem+isredu+isrdense+isrfsu+isrrelig+ #42-51
                      cmaf+mafcodir+mafprefr+mafself+mafrelpos+mafecopos+mafterpos+mafage+maffem+mafedu+mafdense+maffsu+mafrelig+ #52-61
                      strata(count),data=train,method=c("exact"),na.action=na.omit)
  train.results.lab = cbind(coef(train.mod)[16], ((summary(train.mod)$coef)[,3])[16], fold=i)
  train.results.lik = cbind(coef(train.mod)[42], ((summary(train.mod)$coef)[,3])[42], fold=i)
  coefCrossVal=rbind(coefCrossVal, train.results.lab, train.results.lik)
}


plotData.codir <- data.frame(cbind(x.axis = coefCrossVal[,3], y.mean = coefCrossVal[,1], y.se = coefCrossVal[,2], 
                                      y.min=(coefCrossVal[,1]-1.96*coefCrossVal[,2]), y.max=(coefCrossVal[,1]+1.96*coefCrossVal[,2]), rep(c("Labor","Likud"),5))) 
for(ii in 2:5){
  plotData.codir[,ii] = as.numeric(as.character(plotData.codir[,ii]))
}

#Plot the figure
tmp2 = ggplot(plotData.codir, aes(
  x=x.axis, y=y.mean, ymin=y.min, ymax=y.max, color=as.factor(V6)))
tmp2 = tmp2 + geom_hline(yintercept=0, color='red', linetype='dashed')
tmp2 = tmp2 + geom_linerange(position=position_dodge(width=1/2))
tmp2 = tmp2 + geom_pointrange(position=position_dodge(width=1/2))
tmp2 = tmp2 + ylab("Coalition expectation") + xlab("Fold")
tmp2 = tmp2 + coord_flip() + theme_bw()
tmp2 = tmp2 + theme(legend.title=element_blank()) #suppress legend title
tmp2 = tmp2 + theme(legend.position = c(.8, .8))

tmp2 

#Separate plots for each party
lab.coef.cross <- coefCrossVal[c(1,3,5,7,9),]

plotData.labcodir <- data.frame(cbind(x.axis = lab.coef.cross[,3], y.mean = lab.coef.cross[,1], y.se = lab.coef.cross[,2], 
                                      y.min=(lab.coef.cross[,1]-1.96*lab.coef.cross[,2]), y.max=(lab.coef.cross[,1]+1.96*lab.coef.cross[,2]))) 

#Plot data
labcodir.plot <- ggplot(plotData.labcodir, aes(x=plotData.labcodir$x.axis, y=plotData.labcodir$y.mean)) +
  geom_errorbar(aes(ymin=plotData.labcodir$y.min, ymax=plotData.labcodir$y.max, width=.1)) +
  geom_point()+
  geom_line(aes(x=x.axis, y=0), col="red", )+
  ggtitle("Labor")+
  scale_y_continuous(name="Coalition Expectation") +
  scale_x_continuous(name='Random subsamples', breaks=(1:5))+
  coord_flip() 

labcodir.plot

lik.coef.cross <- coefCrossVal[c(2,4,6,8,10),]

plotData.likcodir <- data.frame(cbind(x.axis = lik.coef.cross[,3], y.mean = lik.coef.cross[,1], y.se = lik.coef.cross[,2], 
                                      y.min=(lik.coef.cross[,1]-1.96*lik.coef.cross[,2]), y.max=(lik.coef.cross[,1]+1.96*lik.coef.cross[,2]))) 

#Plot data
likcodir.plot <- ggplot(plotData.likcodir, aes(x=plotData.likcodir$x.axis, y=plotData.likcodir$y.mean)) +
  geom_errorbar(aes(ymin=plotData.likcodir$y.min, ymax=plotData.likcodir$y.max, width=.1)) +
  geom_point()+
  geom_hline(yintercept=0, color='red', linetype='dashed') +
  ggtitle("Likud")+
  scale_y_continuous(name="Coalition Expectation") +
  scale_x_continuous(name='Random subsamples', breaks=(1:5)) +
  coord_flip() 

likcodir.plot

#########################
#Coefficient plot
############################

coefs <- coef(mod1) 
std.errors <- (summary(mod1)$coef)[,3]
lower95 <- coefs-1.96*std.errors
upper95 <- coefs+1.96*std.errors
party.name <- c("Across parties", rep("Meretz", 13), rep("Labor", 13), rep("Shas", 13), rep("Likud", 13), rep("Isr. Beinetu", 13), rep("Mafdal", 13) ) 
var.name <- c("Distance", rep(c("(Intercept)","Expected coalition", "Coalition preference", "Left-right position", "State-religion", "Size of government", 
                                "Territories", "Age","Female","Education","Housing density","FSU immigrant", "Religiosity"),6))

ggCoef <- data.frame(cbind(coefs, std.errors, lower95, upper95, party.name, var.name))
which(ggCoef$var.name == "(Intercept)")
which(ggCoef$var.name == "Coalition preference" & ggCoef$party.name == "Meretz")
ggCoef <- ggCoef[c(1,3,5:14,2,16:27,15,29:40,28,42:53,41,55:66,54,68:79,67),]
ggCoef

for(ii in 1:4){
  ggCoef[,ii] = as.numeric(as.character(ggCoef[,ii]))
}

subset.ggCoef <- ggCoef[which(ggCoef$var.name %in% c("Left-right position", "Coalition preference", "Expected coalition", "Distance")),]
subset.ggCoef$var.name <- factor(subset.ggCoef$var.name, levels= c("Left-right position", "Coalition preference", "Expected coalition", "Distance")) 

tmp1 = ggplot(subset.ggCoef, aes(
  x=var.name, y=coefs, ymin=lower95, ymax=upper95, color=as.factor(party.name)))
tmp1 = tmp1 + geom_hline(yintercept=0, color='red', linetype='dashed')
tmp1 = tmp1 + geom_linerange(position=position_dodge(width=1/2))
tmp1 = tmp1 + geom_pointrange(position=position_dodge(width=1/2))
tmp1 = tmp1 + ylab("Parameter Estimate") + xlab("")
tmp1 = tmp1 + coord_flip() + theme_bw()
tmp1 = tmp1 + theme(legend.title=element_blank()) #suppress legend title
tmp1 = tmp1 + theme(legend.position = c(.75, .75))
